Subgroup Identification Analysis

GBSG Example

Author

Larry Leon

Code
# Set options
knitr::opts_chunk$set(
  echo = TRUE,
  warning = FALSE,
  message = FALSE,
  fig.align = 'center',
  fig.retina = 2
)
rm(list=ls())
library(tinytex)
Warning: package 'tinytex' was built under R version 4.5.2
Code
library(ggplot2)

#library(table1)

library(gt)

library(survival)
library(data.table)
library(randomForest)
library(grf)
library(policytree)
library(DiagrammeR)

#library(grid)
#library(forestploter)
#library(randomizr)

# library(devtools)
# install_github("larry-leon/weightedsurv", force = TRUE)
#install.packages("weightedsurv")
# install_github("larry-leon/forestsearch", force = TRUE)

library(forestsearch)
library(weightedsurv)

# Set theme for plots
theme_set(theme_minimal(base_size = 12))

1 Summary

Reproducing main GBSG analysis

1.1 Datasetup

Code
df.analysis <- gbsg
df.analysis <- within(df.analysis,{
id <- as.numeric(c(1:nrow(df.analysis)))  
# time to months
time_months <- rfstime/30.4375
grade3 <- ifelse(grade=="3",1,0)
treat <- hormon
})
confounders.name <- c("age","meno","size","grade3","nodes","pgr","er")
outcome.name <- c("time_months")
event.name <- c("status")
id.name <- c("id")
treat.name <- c("hormon")

1.2 Kaplan-Meier curves and baseline summary

Code
dfcount <- df_counting(
  df = df.analysis,
  by.risk = 6,
  tte.name = outcome.name, 
  event.name = event.name, 
  treat.name = treat.name
)
plot_weighted_km(dfcount, conf.int = TRUE, show.logrank = TRUE, ymax = 1.05, xmed.fraction = 0.775, ymed.offset = 0.125)

Code
create_summary_table(data = df.analysis, treat_var = treat.name, 
                     table_title = "GBSG Characteristics by Treatment Arm",
                                      vars_continuous=c("age","nodes","size","er","pgr"),
                                      vars_categorical=c("grade","grade3"),
                                      font_size = 12)
GBSG Characteristics by Treatment Arm
Characteristic Control (n=440) Treatment (n=246) P-value1 SMD2
age Mean (SD) 51.1 (10.0) 56.6 (9.4) <0.001 0.57
nodes Mean (SD) 4.9 (5.6) 5.1 (5.3) 0.665 0.03
size Mean (SD) 29.6 (14.4) 28.8 (14.1) 0.470 0.06
er Mean (SD) 79.7 (124.2) 125.8 (191.1) <0.001 0.30
pgr Mean (SD) 102.0 (170.0) 124.3 (249.7) 0.213 0.11
grade 0.273 0.06
1 48 (10.9%) 33 (13.4%)
2 281 (63.9%) 163 (66.3%)
3 111 (25.2%) 50 (20.3%)
grade3 0.174 0.05
0 329 (74.8%) 196 (79.7%)
1 111 (25.2%) 50 (20.3%)
1 P-values: t-test for continuous, chi-square/Fisher's exact for categorical/binary variables
2 SMD = Standardized mean difference (Cohen's d for continuous, Cramer's V for categorical)

1.3 GRF analysis

Code
## GRF
grf_est1 <- grf.subg.harm.survival(data=df.analysis,
confounders.name = confounders.name,
outcome.name=outcome.name, event.name=event.name, id.name=id.name, treat.name=treat.name,
maxdepth = 2, n.min = 60, dmin.grf = 12, frac.tau=0.6, details=TRUE)
tau, maxdepth = 46.75811 2 
   leaf.node control.mean control.size control.se depth
1          2         6.49        82.00       3.34     1
2          3        -4.10       604.00       1.06     1
11         4        -7.90       112.00       2.81     2
21         5         3.86       177.00       1.87     2
4          7        -5.89       356.00       1.33     2

Selected subgroup:
  leaf.node control.mean control.size control.se depth
1         2         6.49        82.00       3.34     1

GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "er <= 0"

All splits:
[1] "er <= 0"   "age <= 50" "age <= 43"
Code
# NOTE: In general for GRF trees
# leaf1 --> recommend control
# leaf2 --> recommend treatment
# Tree depth 1
plot(grf_est1$tree1,leaf.labels=c("Control","Treat"))
Code
# Tree depth 2
plot(grf_est1$tree2,leaf.labels=c("Control","Treat"))

1.4 Forestsearch with depth=2 (maxk = 2)

Code
# Setup parallel processing
library(doFuture)
library(doRNG)

registerDoFuture()
registerDoRNG()

system.time({fs <- forestsearch(df.analysis,  confounders.name = confounders.name,
                                outcome.name = "time_months", treat.name = "hormon", event.name = "status", id.name = "id",
                                potentialOutcome.name = NULL, 
                                df.test = NULL,
                                flag_harm.name = NULL,
                                hr.threshold = 1.0, hr.consistency = 0.9, pconsistency.threshold = 0.90,
                                sg_focus = "hr", max_subgroups_search = 30,
                                use_twostage = TRUE,
                                showten_subgroups = TRUE, details=TRUE,
                                conf_force = NULL,
                                cut_type = "default", use_grf = TRUE, plot.grf = TRUE, use_lasso = FALSE,
                                maxk = 2, fs.splits = 1000,
                                n.min = 60, d0.min = 10, d1.min = 10,
                                plot.sg = TRUE, by.risk = 6,
                                parallel_args = list(plan="callr", workers = 30, show_message = TRUE)
)
})

=== Two-Stage Consistency Evaluation Enabled ===
Stage 1 screening splits: 30 
Maximum total splits: 1000 
Batch size: 20 
================================================

GRF stage for cut selection with dmin, tau = 12 0.6 
tau, maxdepth = 46.75811 2 
   leaf.node control.mean control.size control.se depth
1          2         6.49        82.00       3.34     1
2          3        -4.10       604.00       1.06     1
11         4        -7.90       112.00       2.81     2
21         5         3.86       177.00       1.87     2
4          7        -5.89       356.00       1.33     2

Selected subgroup:
  leaf.node control.mean control.size control.se depth
1         2         6.49        82.00       3.34     1

GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "er <= 0"

All splits:
[1] "er <= 0"   "age <= 50" "age <= 43"
GRF cuts identified: 3 
  Cuts: er <= 0, age <= 50, age <= 43 
# of continuous/categorical characteristics 5 2 
Continuous characteristics: age size nodes pgr er 
Categorical characteristics: meno grade3 
Default cuts included (1st 20) age <= mean(age) age <= median(age) age <= qlow(age) age <= qhigh(age) size <= mean(size) size <= median(size) size <= qlow(size) size <= qhigh(size) nodes <= mean(nodes) nodes <= median(nodes) nodes <= qlow(nodes) nodes <= qhigh(nodes) pgr <= mean(pgr) pgr <= median(pgr) pgr <= qlow(pgr) pgr <= qhigh(pgr) er <= mean(er) er <= median(er) er <= qlow(er) er <= qhigh(er) 
Categorical: meno grade3 
Factors per GRF: er <= 0 age <= 50 age <= 43 
Initial GRF cuts included er <= 0 age <= 50 age <= 43 

===== CONSOLIDATED CUT EVALUATION (IMPROVED) =====
Evaluating 25 cut expressions once and caching...
Cut evaluation summary:
  Total cuts:  25 
  Valid cuts:  25 
  Errors:  0 
✓ All 25 factors validated as 0/1
===== END CONSOLIDATED CUT EVALUATION =====

# of candidate subgroup factors= 25 
 [1] "er <= 0"      "age <= 50"    "age <= 43"    "age <= 53.1"  "age <= 53"   
 [6] "age <= 46"    "age <= 61"    "size <= 29.3" "size <= 25"   "size <= 20"  
[11] "size <= 35"   "nodes <= 5"   "nodes <= 3"   "nodes <= 1"   "nodes <= 7"  
[16] "pgr <= 110"   "pgr <= 32.5"  "pgr <= 7"     "pgr <= 131.8" "er <= 96.3"  
[21] "er <= 36"     "er <= 8"      "er <= 114"    "meno"         "grade3"      
Number of possible configurations (<= maxk): maxk = 2 , # combinations = 1275 
Events criteria: control >= 10 , treatment >= 10 
Subgroup search completed in 0.02 minutes
Found 77 subgroup candidate(s)
# of candidate subgroups (meeting all criteria) = 77 
Random seed set to: 8316951 
Removed 6 near-duplicate subgroups
Original rows: 77 
After removal: 71 
# of unique initial candidates: 71 
# Restricting to top stop_Kgroups = 30 
# of candidates to evaluate: 30 
Algorithm: Two-stage sequential 
  Stage 1 splits: 30 
  Screen threshold: 0.763 
  Max total splits: 1000 
  Batch size: 20 
Parallel processing: callr with 30 workers

*** Subgroup found: {er <= 0} !{age <= 43} 
% consistency criteria met= 1 
SG focus= hr 
Subgroup Consistency Minutes= 0.066 
Algorithm used: Two-stage sequential 
Candidates evaluated: 30 
Candidates passed: 13 
Subgroup found (FS) with sg_focus='hr'
Selected subgroup: {er <= 0} & !{age <= 43} 
Minutes forestsearch overall = 0.1 
Consistency algorithm used: twostage 
   user  system elapsed 
 27.081   1.896   5.845 
Code
plan("sequential")


# Results for estimation (training) data, which_df = "est" is default
res_tabs <- sg_tables(fs, ndecimals = 3, which_df = "est")

res_tabs$sg10_out
Identified Subgroups
Two-factor subgroups (maxk=2)
Factor 1 Factor 2 N Events E1 HR Pcons
{er <= 0} !{age <= 43} 68 38 14 2.164 1.000
{er <= 0} {size <= 35} 61 34 15 2.537 0.990
{er <= 0} {pgr <= 32.5} 75 41 16 2.222 0.990
{er <= 0} {nodes <= 7} 61 31 11 2.335 0.970
{er <= 0} !{size <= 20} 61 35 12 2.054 0.970
{age <= 50} {pgr <= 7} 71 36 12 1.707 0.970
!{age <= 43} {age <= 50} 177 55 18 1.530 0.970
{er <= 0} 82 45 16 1.951 0.960
!{size <= 29.3} {er <= 8} 76 47 15 1.722 0.960
{er <= 0} {pgr <= 7} 64 34 13 1.992 0.950
{er <= 8} !{meno} 84 46 12 1.725 0.910
{grade3} {pgr <= 7} 72 39 13 1.710 0.910
!{size <= 25} {er <= 8} 89 55 17 1.553 0.900
Search Configuration: Single-factor candidates (L) = 50; Maximum combinations evaluated = 1,275; Search depth (maxk) = 2
Search Results: Candidate subgroups found = 77; Maximum HR estimate = 2.54
Note: E1 = events in treatment arm; Pcons = consistency proportion
Code
res_tabs$tab_estimates
Treatment Effect Estimates
Training data estimates
Subgroup n n1 events m1 m0 RMST HR (95% CI)
ITT 686 (100.0%) 246 (35.9%) 299 (43.6%) 66.3 50.2 7.8 0.69 (0.54, 0.89)
Questionable 68 (9.9%) 22 (32.4%) 38 (55.9%) 18.5 47.6 -15 2.16 (1.08, 4.35)
Recommend 618 (90.1%) 224 (36.2%) 261 (42.2%) 66.7 52.6 9.9 0.62 (0.48, 0.81)

1.5 Bootstrap Inference

Code
#output_dir <- "dev/vignettes-working/applications/gbsg/results"
output_dir <- "results/"
save_results <- dir.exists(output_dir)
# File pre-fix for saving
fileout_boot <- c("gbsg-k2_v5_B=1000")
fileout_cv <- c("gbsg-k2_v5_CV=200")


# patchhwork needed for a combined bootstrap plot (otherwise if not avaialable will not produce)
library(patchwork)

# Number of bootstrap samples
NB <- 1000

system.time({fs_bc <- forestsearch_bootstrap_dofuture(
  fs.est = fs, 
  nb_boots = NB, 
  show_three = FALSE, 
  details = TRUE)
})
Ystar matrix generated should be 'boots x N': 1000 x 686

ForestSearch parameters for bootstrap iterations:
  - sg_focus: hr 
  - maxk: 2 
  - fs.splits: 1000 
  - max_subgroups_search: 30 
  - hr.threshold: 1 
  - hr.consistency: 0.9 
  - pconsistency.threshold: 0.9 
  - n.min: 60 
  - use_twostage: TRUE 
  - use_lasso: FALSE 
  - use_grf: TRUE 
  Bootstrap-specific overrides:
  - grf_res: NULL (forces re-selection)
  - grf_cuts: NULL (forces re-selection)
  - parallel_args: sequential (prevents nested parallelism)
  - details: FALSE (suppressed in workers)
  - plot.sg: FALSE
  - plot.grf: FALSE

=== Bootstrap Analysis Complete ===
Success rate: 96.1% (961/1000)

H (Questionable) Estimates:
  Unadjusted:       2.16 (1.08,4.35) 
  Bias-corrected:  1.64 (0.73,3.69) 

Hc (Recommend) Estimates:
  Unadjusted:       0.62 (0.48,0.81) 
  Bias-corrected:  0.65 (0.45,0.94) 
===================================
     user    system   elapsed 
13210.452   240.624  1074.915 
Code
plan("sequential")


if (save_results) {
    filename <- file.path(output_dir, 
                         paste0(fileout_boot, 
                                ".RData"))
    save(df.analysis, fs, fs_bc, file = filename)
    cat("\nResults saved to:", filename, "\n")
}

Results saved to: results//gbsg-k2_v5_B=1000.RData 

1.5.1 Diagnostics and Summaries

Code
#load("~/Documents/GitHub/forestsearch/vignettes/results/sim_gbsg_example_B=1000.RData")

output_dir <- "results/"

load_results <- dir.exists(output_dir)
if(load_results){
filename <- file.path(output_dir, 
                         paste0(fileout_boot,".RData"))

load(file = filename)
}

summaries <- summarize_bootstrap_results(
      sgharm = fs$sg.harm,
      boot_results = fs_bc,
      create_plots = TRUE,
      est.scale = "hr"
    )

===============================================================
           BOOTSTRAP ANALYSIS SUMMARY                          
===============================================================

BOOTSTRAP SUCCESS METRICS:
-------------------------------------------------------------
  Total iterations:              1000
  Successful subgroup ID:        961 (96.1%)
  Failed to find subgroup:       39 (3.9%)

TIMING ANALYSIS:
-------------------------------------------------------------
Overall:
  Total bootstrap time:          17.89 minutes (0.30 hours)
  Average per iteration:         0.02 min (1.1 sec)

Per-iteration timing:
  Mean:                          0.24 min (14.5 sec)
  Median:                        0.24 min (14.2 sec)
  Std Dev:                       0.09 minutes
  Range:                         [0.05, 0.57] minutes
  IQR:                           [0.17, 0.31] minutes

ForestSearch timing (successful iterations only):
  Iterations with FS:            1000 (100.0%)
  Mean FS time:                  0.24 min (14.5 sec)
  Median FS time:                0.24 min (14.2 sec)
  Total FS time:                 241.64 minutes
  FS time % of total:            1351.0%

Overhead timing (Cox models, bias correction, etc.):
  Mean overhead:                 0.00 min (0.0 sec)
  Median overhead:               0.00 min (0.0 sec)
  Total overhead:                0.24 minutes
  Overhead % of total:           1.3%

PERFORMANCE ASSESSMENT:
-------------------------------------------------------------
  Performance rating:            ✓✓✓ Excellent
  Average iteration speed:       1.1 seconds

===============================================================
Code
sg_tab <- summaries$table

sg_tab
Treatment Effect by Subgroup
Bootstrap bias-corrected estimates (1000 iterations)
Subgroup
Sample Size
Survival
Treatment Effect
N NT Events MedT MedC RMSTd HR
(95% CI)1
HR
(95% CI)2
Qstnbl 68 (9.9%) 22 (32.4%) 38 (55.9%) 18.5 47.6 -15 2.16 (1.08, 4.35) 1.64 (0.73,3.69)
Recmnd 618 (90.1%) 224 (36.2%) 261 (42.2%) 66.7 52.6 9.9 0.62 (0.48, 0.81) 0.65 (0.45,0.94)
1 Unadjusted HR: Standard Cox regression hazard ratio with robust standard errors
2 Bias-corrected HR: Bootstrap-adjusted estimate using infinitesimal jacknife method (1000 iterations). Corrects for optimism in subgroup selection.
Note: Med = Median survival time (months). RMSTd = Restricted mean survival time difference. Subgroup identified in 96.1% of bootstrap samples.
Code
event_summary <- summarize_bootstrap_events(fs_bc, threshold = 12)

=== Bootstrap Event Count Summary ===
Total bootstrap iterations: 1000
Event threshold: <12 events

ORIGINAL Subgroup H on BOOTSTRAP samples:
  Control arm <12 events: 0 (0.0%)
  Treatment arm <12 events: 0 (0.0%)
  Either arm <12 events: 0 (0.0%)

ORIGINAL Subgroup Hc on BOOTSTRAP samples:
  Control arm <12 events: 0 (0.0%)
  Treatment arm <12 events: 0 (0.0%)
  Either arm <12 events: 0 (0.0%)

NEW Subgroups found: 961 (96.1%)

NEW Subgroup H* on ORIGINAL data:
  Control arm <12 events: 35 (3.6% of successful)
  Treatment arm <12 events: 81 (8.4% of successful)
  Either arm <12 events: 113 (11.8% of successful)

NEW Subgroup Hc* on ORIGINAL data:
  Control arm <12 events: 0 (0.0% of successful)
  Treatment arm <12 events: 0 (0.0% of successful)
  Either arm <12 events: 0 (0.0% of successful)
Code
summaries$diagnostics_table_gt
Bootstrap Diagnostics Summary
Analysis of 1000 bootstrap iterations
Category Metric Value
Success Rate1 Total iterations 1000
Successful subgroup ID 961 (96.1%)
Failed to find subgroup 39 (3.9%)
Success rating Excellent ✓✓✓
Subgroup H (Questionable) Unadjusted estimate 2.16 (1.08, 4.35)
Bias-corrected estimate 1.64 (0.73, 3.69)
Bias correction impact2 24.2%
CI width change3 3.27 -> 2.96
Subgroup Hc (Recommend) Unadjusted estimate 0.62 (0.48, 0.81)
Bias-corrected estimate 0.65 (0.45, 0.94)
Bias correction impact2 4.2%
CI width change3 0.33 -> 0.49
Bootstrap Quality: H Valid iterations 961
Mean (SD) 0.49 (0.49)
Coefficient of variation4 98.2%
Skewness5 -0.09
Bootstrap Quality: Hc Valid iterations 961
Mean (SD) -0.43 (0.21)
Coefficient of variation4 49.7%
Skewness5 0.23
Search Performance Mean max HR found 3.19 (1.27)
Mean factors evaluated 47.7
Mean combinations tried 1168
Proportion at maxk --
1 Success Rate: Proportion of bootstrap samples where ForestSearch identified a valid subgroup
2 Bias Correction Impact: Percentage change from unadjusted to bias-corrected estimate
3 CI Width Change: Confidence interval width before -> after bias correction
4 Coefficient of Variation: Standard deviation as % of mean (lower is better)
5 Skewness: Measure of asymmetry (0 = symmetric, |skew| < 1 is generally good)
Interpretation Guide:

Excellent stability: Subgroup is consistently identified across bootstrap samples.

High variability: Bootstrap estimates are imprecise (CV >= 25%). Consider increasing nb_boots or sample size.

Code
summaries$subgroup_summary$original_agreement
                            Metric       Value
                            <char>      <char>
1:      Total bootstrap iterations        1000
2:           Successful iterations         961
3: Failed iterations (no subgroup)          39
4:       Exact match with original 146 (15.2%)
5:         Different from original 815 (84.8%)
Code
summaries$subgroup_summary$factor_presence
  Rank Factor Count  Percent
2    1     er   591 61.49844
6    2    pgr   354 36.83663
7    3   size   308 32.04995
1    4    age   245 25.49428
5    5  nodes   140 14.56816
3    6 grade3   131 13.63163
4    7   meno   130 13.52758
Code
summaries$subgroup_summary$factor_presence_specific
    Rank Base_Factor Factor_Definition Count  Percent
149    1          er         {er <= 0}   282 29.34443
168    2          er         {er <= 8}   129 13.42352
171    3      grade3          {grade3}   128 13.31946
77     4        meno           !{meno}   126 13.11134
Code
summaries$plots$combined

1.6 Forest Search n-fold cross-validation

Code
output_dir <- "results/"
load_results <- dir.exists(output_dir)
if(load_results){
filename <- file.path(output_dir, 
                         paste0(fileout_boot,".RData"))

load(file = filename)
}

# Kfolds = n (default to n-fold cross-validations)

fs_OOB <- NULL

fs_OOB <- forestsearch_Kfold(fs.est = fs, details = TRUE,
                             parallel_args = list(plan = "callr", workers = 36, show_message = TRUE))
Cross-validation setup:
  - Observations: 686 
  - Folds: 686 
  - Fold sizes (range): 1-1 

ForestSearch parameters for CV folds:
  - sg_focus: hr 
  - maxk: 2 
  - fs.splits: 1000 
  - max_subgroups_search: 30 
  - hr.threshold: 1 
  - hr.consistency: 0.9 
  - pconsistency.threshold: 0.9 
  - n.min: 60 
  - use_twostage: TRUE 
  - use_lasso: FALSE 
  - use_grf: TRUE 
  - (per-fold parallel: sequential)
  - (per-fold details: FALSE)
  - (per-fold plot.sg: FALSE)

Cross-validation complete:
  - Time: 13.5 minutes
  - Subgroup found in 100 % of folds
Any found: 1 
Exact match: 0.8746356 
At least 1 match: 0.9752187 
Cov 1 any: 0.9985423 
Cov 2 any: 0.8746356 
Cov 1 and 2 any: 0.8746356 
Cov 1 exact: 0.9752187 
Cov 2 exact: 0.8746356 
Agreement (sens, ppv) in H and Hc: 0.8529412 0.9822006 0.8405797 0.9837925 
Code
# Reset workers to single
plan(sequential)

summary_OOB <- forestsearch_KfoldOut(res=fs_OOB, details=TRUE, outall=TRUE)
Any found: 1 
Exact match: 0.8746356 
At least 1 match: 0.9752187 
Cov 1 any: 0.9985423 
Cov 2 any: 0.8746356 
Cov 1 and 2 any: 0.8746356 
Cov 1 exact: 0.9752187 
Cov 2 exact: 0.8746356 
Agreement (sens, ppv) in H and Hc: 0.8529412 0.9822006 0.8405797 0.9837925 
        Subgroup        n              n1            m1     m0     RMST  
Overall "ITT"           "686 (100.0%)" "246 (35.9%)" "66.3" "50.2" "7.8" 
FA_0    "Not recommend" "68 (9.9%)"    "22 (32.4%)"  "18.5" "47.6" "-15" 
KfA_0   "Not recommend" "69 (10.1%)"   "18 (26.1%)"  "27.2" "42.9" "-2.6"
FA_1    "Recommend"     "618 (90.1%)"  "224 (36.2%)" "66.7" "52.6" "9.9" 
KfA_1   "Recommend"     "617 (89.9%)"  "228 (37.0%)" "66.3" "52.6" "7.5" 
        Hazard ratio       
Overall "0.69 (0.54, 0.89)"
FA_0    "2.16 (1.08, 4.35)"
KfA_0   "1.13 (0.52, 2.47)"
FA_1    "0.62 (0.48, 0.81)"
KfA_1   "0.68 (0.52, 0.88)"
Code
table(summary_OOB$SGs_found[,1])

   !{age <= 43} !{size <= 29.3} !{size <= 29.4}       {er <= 0}       {er <= 8} 
            145              10               1             524               5 
       {grade3} 
              1 
Code
table(summary_OOB$SGs_found[,2])

   !{age <= 43} !{size <= 29.3}       {er <= 0}       {er <= 8}     {pgr <= 32} 
            455               5             145              11               3 
    {pgr <= 33}      {pgr <= 7} 
             53              11 
Code
Ksims <- 200

fs_ten <- forestsearch_tenfold(fs.est = fs, sims = Ksims, Kfolds = 10, details = TRUE, 
                       parallel_args = list(plan = "callr", workers = 36, show_message = TRUE))
Starting repeated K-fold cross-validation:
  - Simulations: 200 
  - Folds per simulation: 10 
  - Workers: 13 

ForestSearch parameters for CV folds:
  - sg_focus: hr 
  - maxk: 2 
  - fs.splits: 1000 
  - max_subgroups_search: 30 
  - hr.threshold: 1 
  - hr.consistency: 0.9 
  - pconsistency.threshold: 0.9 
  - n.min: 60 
  - use_twostage: TRUE 
  - use_lasso: FALSE 
  - use_grf: TRUE 
  - (per-fold parallel: sequential)
  - (per-fold details: FALSE)
  - (per-fold plot.sg: FALSE)

Repeated K-fold CV complete:
  - Time: 28.68 minutes
  - Successful simulations: 200 / 200 
  - Projected hours per 100 sims: 0.24 
Code
# Reset workers to single
plan(sequential)

print(fs_ten$find_summary)
       Any      Exact At least 1       Cov1       Cov2  Cov 1 & 2 Cov1 exact 
       0.9        0.1        0.5        0.8        0.2        0.1        0.5 
Cov2 exact 
       0.1 
Code
print(fs_ten$sens_summary)
   sens_H   sens_Hc     ppv_H    ppv_Hc 
0.6029412 0.9498382 0.5656334 0.9560618 
Code
print(head(fs_ten$sens_out))
        sens_H   sens_Hc     ppv_H    ppv_Hc
[1,] 0.5588235 0.9530744 0.5671642 0.9515347
[2,] 0.5294118 0.9433657 0.5070423 0.9479675
[3,] 0.7352941 0.9498382 0.6172840 0.9702479
[4,] 0.5294118 0.9126214 0.4000000 0.9463087
[5,] 0.5294118 0.9385113 0.4864865 0.9477124
[6,] 0.4705882 0.9352751 0.4444444 0.9413681
Code
print(head(fs_ten$find_out))
     Any Exact At least 1 Cov1 Cov2 Cov 1 & 2 Cov1 exact Cov2 exact
[1,] 0.9   0.2        0.6  0.7  0.3       0.2        0.5        0.3
[2,] 0.9   0.0        0.4  0.8  0.0       0.0        0.4        0.0
[3,] 1.0   0.2        0.7  0.8  0.3       0.2        0.6        0.3
[4,] 1.0   0.1        0.5  0.6  0.3       0.1        0.3        0.3
[5,] 0.9   0.0        0.3  0.8  0.1       0.1        0.3        0.0
[6,] 0.9   0.2        0.5  0.7  0.4       0.2        0.3        0.4
Code
# Save all results

output_dir <- "results/"
save_results <- dir.exists(output_dir)

if (save_results) {
    filename <- file.path(output_dir, 
                         paste0(fileout_cv, 
                                ".RData"))
    save(df.analysis, fs, fs_bc, fs_ten, fs_OOB, file = filename)
    cat("\nResults saved to:", filename, "\n")
}

Results saved to: results//gbsg-k2_v5_CV=200.RData 
Code
output_dir <- "results/"
load_results <- dir.exists(output_dir)
if(load_results){
filename <- file.path(output_dir, 
                         paste0(fileout_cv,".RData"))

load(file = filename)
}

#' # Define subgroups to display
subgroups <- list(
 age_gt65 = list(
 subset_expr = "age > 65",
 name = "age > 65",
     type = "reference"
   ),
 age_lt65 = list(
 subset_expr = "age <= 65",
 name = "age <= 65",
     type = "reference"
   ),
pgr_positive = list(
 subset_expr = "pgr > 0",
 name = "pgr > 0",
     type = "reference"
   ),
pgr_negative = list(
 subset_expr = "pgr <= 0",
 name = "pgr <= 0",
     type = "reference"
   )
  )


# Create the forest plot
 result <- plot_subgroup_results_forestplot(
   fs_results = list(fs.est = fs, fs_bc = fs_bc, fs_OOB = fs_OOB, fs_kfold = fs_ten),
   df_analysis = df.analysis,
   subgroup_list = subgroups,
   outcome.name = "time_months",
   event.name = "status",
   treat.name = "hormon",
   E.name = "Hormon",
   C.name = "CT",
   ci_column_spaces = 25
 )

# Display the plot
plot(result$plot)